import pandas as pd
import soundfile as sf

org_tsv = "/home/shaonian/SED/SED/configs/dataset/desed_tsv/train_unlabeled_id_Y800M_thd95_200h.tsv"

all_files = pd.read_csv(org_tsv, sep="\t")
print(f"Total files in original TSV: {len(all_files)}")

# random select 180 hours
sample_size = 180 * 3600 / 10  # 180 hours, each file is 10 seconds
sampled_files = all_files.sample(n=int(sample_size), random_state=42)
print(f"Sampled {len(sampled_files)} files for 180 hours")  

# Save the sampled files to a new TSV
sampled_tsv = "train_unlabeled_id_Y800M_thd95_180h.tsv"
sampled_files.to_csv(sampled_tsv, sep="\t", index=False)    
